set environment

EDA

data structure transforming

summary(bf)
##     User_ID         Product_ID           Gender         
##  Min.   :1000001   Length:537577      Length:537577     
##  1st Qu.:1001495   Class :character   Class :character  
##  Median :1003031   Mode  :character   Mode  :character  
##  Mean   :1002992                                        
##  3rd Qu.:1004417                                        
##  Max.   :1006040                                        
##                                                         
##      Age              Occupation     City_Category     
##  Length:537577      Min.   : 0.000   Length:537577     
##  Class :character   1st Qu.: 2.000   Class :character  
##  Mode  :character   Median : 7.000   Mode  :character  
##                     Mean   : 8.083                     
##                     3rd Qu.:14.000                     
##                     Max.   :20.000                     
##                                                        
##  Stay_In_Current_City_Years Marital_Status   Product_Category_1
##  Length:537577              Min.   :0.0000   Min.   : 1.000    
##  Class :character           1st Qu.:0.0000   1st Qu.: 1.000    
##  Mode  :character           Median :0.0000   Median : 5.000    
##                             Mean   :0.4088   Mean   : 5.296    
##                             3rd Qu.:1.0000   3rd Qu.: 8.000    
##                             Max.   :1.0000   Max.   :18.000    
##                                                                
##  Product_Category_2 Product_Category_3    Purchase    
##  Min.   : 2.00      Min.   : 3.0       Min.   :  185  
##  1st Qu.: 5.00      1st Qu.: 9.0       1st Qu.: 5866  
##  Median : 9.00      Median :14.0       Median : 8062  
##  Mean   : 9.84      Mean   :12.7       Mean   : 9334  
##  3rd Qu.:15.00      3rd Qu.:16.0       3rd Qu.:12073  
##  Max.   :18.00      Max.   :18.0       Max.   :23961  
##  NA's   :166986     NA's   :373299
head(bf)
str(bf)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 537577 obs. of  12 variables:
##  $ User_ID                   : num  1e+06 1e+06 1e+06 1e+06 1e+06 ...
##  $ Product_ID                : chr  "P00069042" "P00248942" "P00087842" "P00085442" ...
##  $ Gender                    : chr  "F" "F" "F" "F" ...
##  $ Age                       : chr  "0-17" "0-17" "0-17" "0-17" ...
##  $ Occupation                : num  10 10 10 10 16 15 7 7 7 20 ...
##  $ City_Category             : chr  "A" "A" "A" "A" ...
##  $ Stay_In_Current_City_Years: chr  "2" "2" "2" "2" ...
##  $ Marital_Status            : num  0 0 0 0 0 0 1 1 1 1 ...
##  $ Product_Category_1        : num  3 1 12 12 8 1 1 1 1 8 ...
##  $ Product_Category_2        : num  NA 6 NA 14 NA 2 8 15 16 NA ...
##  $ Product_Category_3        : num  NA 14 NA NA NA NA 17 NA NA NA ...
##  $ Purchase                  : num  8370 15200 1422 1057 7969 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   User_ID = col_double(),
##   ..   Product_ID = col_character(),
##   ..   Gender = col_character(),
##   ..   Age = col_character(),
##   ..   Occupation = col_double(),
##   ..   City_Category = col_character(),
##   ..   Stay_In_Current_City_Years = col_character(),
##   ..   Marital_Status = col_double(),
##   ..   Product_Category_1 = col_double(),
##   ..   Product_Category_2 = col_double(),
##   ..   Product_Category_3 = col_double(),
##   ..   Purchase = col_double()
##   .. )
# User_ID: Unique identifier of shopper.
# Product_ID: Unique identifier of product. (No key given)
# Gender: Sex of shopper.
# Age: Age of shopper split into bins.
# Occupation: Occupation of shopper. (No key given)
# City_Category: Residence location of shopper. (No key given)
# Stay_In_Current_City_Years: Number of years stay in current city.
# Marital_Status: Marital status of shopper.
# Product_Category_1: Product category of purchase.
# Product_Category_2: Product may belong to other category.
# Product_Category_3: Product may belong to other category.
# Purchase: Purchase amount in dollars.

##Below are my steps for doing this project:
##1. Finishing EDA on every variable, getting information, comming out proper actions.
##2. When doing EDA, transferring every variable into right datatype
##3. Feature Engeering 
##4. Modelling

Gender

gender <- bf%>%select(User_ID,Gender)%>%group_by(User_ID)%>%distinct()
summary(gender)
##     User_ID           Gender         
##  Min.   :1000001   Length:5891       
##  1st Qu.:1001518   Class :character  
##  Median :1003026   Mode  :character  
##  Mean   :1003025                     
##  3rd Qu.:1004532                     
##  Max.   :1006040
##info: there are 5891 distinct customers

ggplot(gender)+
  geom_bar(aes(x=Gender,y=..count.., fill=Gender))+
  labs(title='Gender of Customers')+
  scale_fill_brewer(palette='PuBuGn')

sum(gender$Gender=='M')/nrow(gender)
## [1] 0.7171957
##info: there are 71.7% of cutomers are Males
##actn: take into account males' buying behavior

##However, maybe Females have stornger buying power?
##Q: whether females spend more?

genderSpend <- bf%>%select(User_ID, Gender, Purchase)%>%group_by(User_ID,Gender)%>% summarise(totalSpending=sum(Purchase),avgSpending=mean(Purchase))

ggplot(genderSpend, aes(totalSpending))+
  geom_histogram(bins = 200)+
  scale_x_continuous(labels=comma)+
  coord_cartesian(xlim=c(0,5000000))+
  facet_wrap(~Gender)+
  labs(title='Total Spending distribution devided by Gender')

ggplot(genderSpend, aes(avgSpending))+
  geom_histogram(bins=100)+
  facet_wrap(~Gender)+
  labs(title='Average Spending distribution devided by Gender')

ggplot(genderSpend,aes(Gender, totalSpending))+
  geom_bar(stat='summary', fun.y='mean', fill='gold2')+
  labs(title='TotalSpending mean devided by Gender')+
  geom_hline(yintercept=median(genderSpend$totalSpending[genderSpend$Gender=='F']), linetype='dashed', color='red')+
  geom_hline(yintercept=median(genderSpend$totalSpending[genderSpend$Gender!='F']), linetype='dashed', color='blue')

ggplot(genderSpend,aes(Gender, totalSpending))+
  geom_bar(stat='summary',fun.y='median')+
  labs(title='TotalSpending median devided by Gender')+
  scale_y_continuous(labels=comma)

##info: Males spend more, on average and on median. (F medain=398178, M median=565925; F mean= 699054,M mean=911963.2)
##info: Distribution(right skewed) says that both genders have some super shoppers (outliers spending very much)
##actn: Figuring out what super shoppers buy and what most of people buy

Top Sellers

topSeller <- bf%>%select(Product_ID)%>%group_by(Product_ID)%>%summarise(count=n())%>%arrange(desc(count))
sum(topSeller$count[1:5])/nrow(bf)
## [1] 0.01488903
##info: 3,623 products in Total
##info: Top 5 Selling product:P00265242,P00110742,P00025442,P00112142,P00057642 consisting 1.5% in total buying times

top5 <- bf[bf$Product_ID==topSeller$Product_ID[1:5],]%>% arrange(Product_ID)
##Q: if gender play a role

top5WithGender <- top5 %>% group_by(Product_ID,Gender)%>%summarise(count=n())
sum(top5WithGender$count[top5WithGender$Gender=='M'&top5WithGender$Product_ID!='P00265242'])/sum(top5WithGender$count[top5WithGender$Product_ID!='P00265242'])
## [1] 0.7963405
##info: if we uncount P00265242, Males consist more purchase of other top 4 seller. 
##79.6% of purchase of these 4 products are for males, while for whole dataset, males contribute 71.7%. This conclusion makes sense, since the majority of customers are males, and what they like are more likely become top seller.

Age

cust_Age <- bf%>%select(User_ID,Age,Gender,City_Category)%>% distinct()

ggplot(cust_Age,aes(Age,fill=Age))+
  geom_bar()+
  labs(title='Distribution of Customers Age')

##Q: if Age play a role in top5 selling products?
ggplot(top5, aes(Age))+
  geom_bar(aes(fill=Product_ID))+
  facet_wrap(~Product_ID,nrow=5)+
  labs(title='Distribution of Customers Age of Top 5 Selling Product')

## info:There are some deviation in 26-35 category but not very clear. 

##Q: age distribution when considering city?
ggplot(cust_Age, aes(as.factor(Age),fill=City_Category))+
  geom_bar(aes(y=(..count..)/sum(..count..)*100))+
  facet_wrap(~City_Category)+
  theme(axis.text.x = element_text(angle=45, size=10, color='grey'))+
  labs(y='Percentage', x='Age')

##info: city A customers are younger

City

cust_City <-bf%>%select(User_ID,City_Category,Stay_In_Current_City_Years,Purchase)%>%group_by(User_ID,City_Category,Stay_In_Current_City_Years)%>%summarise(sumPurchase=sum(Purchase))

ggplot(cust_City,aes(City_Category))+
  geom_bar(aes(fill=City_Category))+
  labs(title='Distribution of city category customers live in')

##info: more than 50% of cutomers live in category C.
##actn: looking for the geographical and cultural features of these cities.

##Q: Does city_category has relation with Purchase amount of individual customers?

ggplot(cust_City,aes(City_Category,sumPurchase))+
  geom_boxplot(aes(fill=City_Category),alpha=0.25)+
  scale_y_continuous(label=comma)+
  coord_cartesian(ylim=c(0,7000000))+
  labs(title='Distribution of purchase amount of each City_Category')

##info: customers' purchase amount range is most wide in City_Category B, however City_Category A has more super shoppers.
##info:  City_Category C's customers spend the least and are the least likely be a super shopper.

##Q: Does total purchase amount of each City_Category has something to say?
sumPurEachCat <- cust_City%>%group_by(City_Category)%>%summarise(sumPurchase=sum(sumPurchase))
ggplot(sumPurEachCat,aes(City_Category,sumPurchase))+
  geom_bar(stat='identity',aes(fill=City_Category))+
  scale_y_continuous(label=comma)+
  labs(title='Total purchase amount of each City_Category')

##info: Eventhough City_Category B has fewer customers, it howevers contributes more revenue to this retailer.
##info: City_Category A has the fewest customers, around 1/3 the number of City_Category C, however City_Category A contributes almost the same amount of purchase amount. Maybe the super shoppers in City_Category A play a role!
##actn: On limited MKT budget, we need to focus on promotion to City_Category B's normal customers and City_Category A's super shoppers.

##Q: behaviors of customers in each city

cust_pur_city <- bf%>% group_by(User_ID,City_Category)%>%summarise(count=n(),amount=sum(Purchase))%>%arrange(desc(count))

table(cust_pur_city$City_Category[1:100])
## 
##  A  B 
## 67 33
ggplot(cust_pur_city,aes(User_ID,count))+
  geom_point(aes(col=City_Category,alpha=0.1,position='jitter'))+
  facet_wrap(~City_Category)+
  theme(axis.title.x=element_blank(),axis.text.x=element_blank(),axis.ticks.x=element_blank())+
  labs(title = "distribution of purchase items in each city")+
  theme(legend.position='none')

##info: 67% of top 100 shoppers are from city A.
##info: Customers in city C buy very few times.
##info: City A has many super shoppers.
##actn: find out the reason why city C buy less amount.

Stay_In_Current_City_Years

table(bf$Stay_In_Current_City_Years)
## 
##      0      1      2      3     4+ 
##  72725 189192  99459  93312  82889
#transformming values into integer for more convenient when modelling
order=c('0'=0,'1'=1,'2'=2,'3'=3,'4'=4,'4+'=5)
bf$Stay_In_Current_City_Years <- 
as.numeric(plyr::revalue(bf$Stay_In_Current_City_Years,order))
summary(bf$Stay_In_Current_City_Years)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.000   2.000   2.014   3.000   5.000
##Q: Does Stay_In_Current_City_Years variable has some information?
ggplot(bf, aes(as.factor(Stay_In_Current_City_Years)))+
  geom_bar(aes(fill=as.factor(Stay_In_Current_City_Years)))+
  scale_fill_brewer(palette=15)+
  labs(title = 'Customers Stay in Current City', y = 'Count', x = 'Stay in Current City', fill = 'Number of Years in Current City')

ggplot(bf, aes(City_Category,Stay_In_Current_City_Years))+
  geom_boxplot()

bf_Stay_Year <- bf%>% select(City_Category,Stay_In_Current_City_Years)%>% group_by(City_Category)%>% summarise(mean=mean(Stay_In_Current_City_Years), median=median(Stay_In_Current_City_Years))
print(bf_Stay_Year)
## # A tibble: 3 x 3
##   City_Category  mean median
##   <chr>         <dbl>  <dbl>
## 1 A              1.96      2
## 2 B              2.03      2
## 3 C              2.04      2
ggplot(bf,aes(as.factor(Stay_In_Current_City_Years),Purchase))+
  geom_histogram(stat = 'summary', fun.y='mean')

cor(bf$Purchase,bf$Stay_In_Current_City_Years)
## [1] 0.004750564
## It seems that there is small info in this variable. We only know customers staying in current city for just one year is the biggest part. 


stay_cities <- bf %>%
  group_by(City_Category, Stay_In_Current_City_Years) %>% summarise(count=n()) %>% mutate(Percentage=count/sum(count)*100)

ggplot(stay_cities,aes(City_Category,count, fill= as.factor(Stay_In_Current_City_Years)))+
  geom_bar(stat='identity')+scale_fill_brewer(palette = 2)+
  labs(title = "City Category + Stay in Current City", y = "Total Count (Years)", x = "City", fill = "Stay Years")

####The heights of the bars commonly represent one of two things: either a count of cases in each group, or the values in a column of the data frame. By default, geom_bar uses stat="bin". This makes the height of each bar equal to the number of cases in each group, and it is incompatible with mapping values to the y aesthetic. If you want the heights of the bars to represent values in the data, use stat="identity" and map a value to the y aesthetic.

Purchase

cust_purchase <- cust_City%>%ungroup()%>% select(User_ID,sumPurchase)

##Q: distribution of purchase amount?

ggplot(cust_purchase,aes(sumPurchase))+
  geom_density(adjust=1)+
  geom_vline(aes(xintercept=median(cust_purchase$sumPurchase)),col='blue',linetype='dotted')+
  geom_vline(aes(xintercept=mean(cust_purchase$sumPurchase)),col='red',linetype='dashed')+
  geom_text(aes(x=mean(cust_purchase$sumPurchase)),label=round(mean(cust_purchase$sumPurchase)), y=1.2e-06, color='red', angle=360, size=4, vjust=3, hjust=-.1)+
  geom_text(aes(x=median(cust_purchase$sumPurchase)),label=round(median(cust_purchase$sumPurchase)), y=1.2e-06, col='blue', angle= 360, size=4, vjust=0, hjust=-.1)+
  scale_x_continuous(name="Purchase Amount", limits=c(0,7500000),breaks=seq(0,7500000,by=1000000),expand=c(0,0), labels = comma)+
  scale_y_continuous(name="Density ", limits=c(0,.00000125), labels=scientific, expand= c(0,0))

##info: very right skewed, the mean and median are deviated from the peak of probability.
##actn: focus on high value shopper?

Marital status

mart_stat <- bf %>% select(User_ID,Marital_Status) %>% group_by(User_ID)%>% distinct()

#Q: What percentage of customers is married?
mean(mart_stat$Marital_Status)
## [1] 0.4199627
##info: arround 42% of customers has married.

##Q: relationship between marriage and city?

mart_city <- mart_stat%>%left_join(cust_City, by='User_ID') %>% group_by(City_Category, Marital_Status)%>% tally()

ggplot(mart_city, aes(City_Category,n, fill=as.factor(Marital_Status)))+
  geom_bar(stat='identity', col='black')+
  scale_fill_brewer(palette = 10)+
  labs(title="City + Marital Status",y="Total Count (Shoppers)",x='City',fill="Marital Status")

##info: city A has a higher percentage of unmarriage, it also has more big shopper.

##Q: Is "stay in current city" correlated with marital status?
mart_stay <- mart_stat%>% left_join(cust_City,by='User_ID')%>%group_by(Stay_In_Current_City_Years,Marital_Status)%>%tally()%>%mutate(percent=n/sum(n)*100)

ggplot(mart_stay, aes(Stay_In_Current_City_Years,n, fill=as.factor(Marital_Status)))+
  geom_bar(stat='identity')+
  scale_fill_brewer(palette = 15)+
  labs(y='Marital_Status', fill='Marital_Status')

Top shopper

top_shopper <- cust_pur_city%>% mutate(avg_amount=amount/count)%>%arrange(desc(avg_amount))

ggplot(top_shopper, aes(avg_amount))+
  geom_density()+
  geom_vline(xintercept=mean(top_shopper$avg_amount), linetype='dashed',col='red')+
  geom_text(aes(x=mean(top_shopper$avg_amount), label= round(mean(top_shopper$avg_amount)), y=0.0002, col='red', size=5, hjust=0.8))+
  theme(legend.position = 'none')

##Occupation

occu <- cust_pur_city[]%>%left_join(bf[,c(1,5)],by="User_ID")%>%distinct()

##Q: Which occupation buy the most?

occu_sum <- occu%>%group_by(Occupation)%>%summarise(amount=sum(amount), count=sum(count))%>%arrange(desc(amount))%>%mutate(percent=amount/sum(amount)*100)

ggplot(occu_sum) +
  geom_bar(stat='identity', aes(as.factor(reorder(Occupation,-amount)),percent, fill=as.factor(Occupation)))+
  theme(legend.position = 'none')+
  scale_y_continuous(label=comma)+
  labs(x='occupation', y='percent', title='total purchase amount by occupation')

sum(occu_sum$amount[1:5])/sum(occu_sum$amount)
## [1] 0.5250289
##info: occupation 4 buy the most and top 5 occupations contribute 52.5 % of the sales. 

Apriori Algorithm

Data Preprocessing

# Getting the dataset into the correct format

test <- bf%>%select(User_ID,Product_ID)%>%
  # Selecting the columns we will need
  group_by(User_ID)%>%
  # Grouping by "User_ID"
  arrange(User_ID)%>%
  # Arranging by "User_ID"
  mutate(id=row_number())

cust_prod <- bf%>%select(User_ID,Product_ID)%>%
  # Selecting the columns we will need
  group_by(User_ID)%>%
  # Grouping by "User_ID"
  arrange(User_ID)%>%
  # Arranging by "User_ID"
  mutate(id=row_number())%>%
  # Defining a key column for each "Product_ID" and its corresponding "User_ID" (Must do this for spread() to work properly)
  spread(User_ID,Product_ID)%>%
  # Converting our dataset from tall to wide format, and grouping "Product_IDs" to their corresponding "User_ID"
  t()
  # Transposing the dataset from columns of "User_ID" to rows of "User_ID"
  
cust_prod <- cust_prod[-1,]
# Now we can remove the Id row we created earlier for spread() to work correctly.
str(cust_prod)
##  chr [1:5891, 1:1025] "P00069042" "P00285442" "P00193542" "P00184942" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : chr [1:5891] "1000001" "1000002" "1000003" "1000004" ...
##   ..$ : NULL
####最後剩下的檔案,, customer_id 為row_id, col_id 是product_bought_x

write.csv(cust_prod, file= 'customer_product.csv')
customer_product <- read.transactions('customer_product.csv', sep=',', rm.duplicates = TRUE)
## distribution of transactions with duplicates:
## items
##   46  126  163  202  258  272  285  307  310  316  319  327  330  334  340 
##    1    1    1    1    1    1    1    1    1    1    1    2    1    1    1 
##  344  345  348  354  357  373  393  402  408  419  437  441  449  450  452 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
##  454  456  459  465  466  467  475  476  477  481  487  491  495  498  507 
##    1    1    1    1    2    2    1    1    1    1    2    2    3    2    1 
##  523  524  526  527  528  530  531  532  533  535  537  538  539  540  545 
##    1    2    1    2    1    1    2    1    1    1    1    2    3    1    1 
##  546  548  549  553  554  555  556  558  563  566  567  570  572  574  575 
##    1    3    1    1    2    1    1    2    1    3    1    3    2    4    1 
##  577  578  580  583  584  586  588  589  590  591  592  593  594  595  597 
##    2    3    2    1    1    2    3    5    1    1    1    1    1    3    2 
##  598  601  602  604  607  608  610  612  613  614  615  616  617  618  619 
##    1    1    1    1    2    1    1    2    1    2    1    1    2    1    3 
##  620  623  625  632  633  634  635  638  640  641  642  643  644  645  646 
##    2    1    1    6    1    5    2    2    2    1    2    1    2    3    1 
##  647  648  653  654  657  658  659  661  662  663  664  665  666  667  668 
##    4    1    1    3    2    1    1    2    1    3    1    1    2    1    2 
##  669  670  671  672  674  676  677  678  679  681  682  683  685  686  687 
##    4    2    1    3    1    1    2    3    3    2    2    4    4    5    2 
##  688  689  690  691  692  694  695  697  698  699  700  702  703  704  705 
##    2    1    1    1    1    2    1    1    1    1    2    1    3    1    1 
##  706  707  708  709  710  712  713  714  715  716  717  718  719  720  721 
##    2    2    3    2    2    4    5    2    2    1    1    2    3    3    5 
##  722  723  724  725  726  727  728  729  730  732  733  734  735  736  737 
##    2    1    1    2    5    2    1    3    3    2    1    5    6    2    4 
##  738  739  740  741  742  743  744  745  747  748  749  750  751  752  753 
##    6    3    4    1    6    7    4    6    1    1    5    5    3    2    3 
##  754  755  756  757  758  759  760  761  763  764  765  766  767  768  769 
##    4    6    6    2    6    2    1    5    3    4    2    3    2    5    2 
##  770  771  772  773  774  775  776  777  778  779  780  781  782  783  784 
##    2    3    1    3    4    2    2    3    3    2    4    7    3    4    5 
##  785  786  787  788  789  790  791  792  793  794  795  796  797  798  799 
##    5    3    3    6    5    5    2    3    5    3    8    5    5    9    3 
##  800  801  802  803  804  805  806  807  808  809  810  811  812  813  814 
##    4    4    7    4    3    4    5    7    5    4    5    3    2    6    6 
##  815  816  817  818  819  820  821  822  823  824  825  826  827  828  829 
##    3   11    5   10    6    6    4    7    7    2    5    4    7    5    5 
##  830  831  832  833  834  835  836  837  838  839  840  841  842  843  844 
##    4    5    4    3    5    4   11    5    5    4    9    7    6    4    7 
##  845  846  847  848  849  850  851  852  853  854  855  856  857  858  859 
##    9   11    4    6   10    6   10    7   12   16   11    8    7    4   12 
##  860  861  862  863  864  865  866  867  868  869  870  871  872  873  874 
##    9   11   11    9    6   11   10    7    6    5   12    6    7    8   11 
##  875  876  877  878  879  880  881  882  883  884  885  886  887  888  889 
##    9    9    8    7    5    4   15   13   12    8    4    6   12   15   13 
##  890  891  892  893  894  895  896  897  898  899  900  901  902  903  904 
##   10   11   13    6   21    7   14    9    7   11   18    5   14   10    9 
##  905  906  907  908  909  910  911  912  913  914  915  916  917  918  919 
##   19   15   10   17   18   23    8   19   15   12   18   21   17   12   11 
##  920  921  922  923  924  925  926  927  928  929  930  931  932  933  934 
##   13   13   12   20   20   16   13   15   17   27   22   20   28   18   14 
##  935  936  937  938  939  940  941  942  943  944  945  946  947  948  949 
##   20   20   20   14   22   30   23   23   21   20   25   19   30   31   30 
##  950  951  952  953  954  955  956  957  958  959  960  961  962  963  964 
##   24   27   25   40   30   31   16   29   30   32   48   27   27   24   30 
##  965  966  967  968  969  970  971  972  973  974  975  976  977  978  979 
##   26   35   43   30   51   49   40   41   36   32   36   38   43   41   42 
##  980  981  982  983  984  985  986  987  988  989  990  991  992  993  994 
##   37   49   44   51   57   55   40   53   56   63   39   58   50   58   77 
##  995  996  997  998  999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 
##   74   72   72   84   74   66   77   85   93   79   94  118  122  104  121 
## 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 
##  113  120   78   77   55   37   20    7    5    1
####利用transaction func 讀取後的檔案,是以product_id 為col_name, customer_id 為row_id, 值為bool
##checking if the result is right
topSeller
summary(customer_product)
## transactions as itemMatrix in sparse format with
##  5892 rows (elements/itemsets/transactions) and
##  10539 columns (items) and a density of 0.008768598 
## 
## most frequent items:
## P00265242 P00110742 P00025442 P00112142 P00057642   (Other) 
##      1858      1591      1586      1539      1430    536489 
## 
## element (itemset/transaction) length distribution:
## sizes
##    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20 
##    1    5    7   20   37   55   77   78  120  113  121  104  122  118   94 
##   21   22   23   24   25   26   27   28   29   30   31   32   33   34   35 
##   79   93   85   77   66   74   84   72   72   74   77   58   50   58   39 
##   36   37   38   39   40   41   42   43   44   45   46   47   48   49   50 
##   63   56   53   40   55   57   51   44   49   37   42   41   43   38   36 
##   51   52   53   54   55   56   57   58   59   60   61   62   63   64   65 
##   32   36   41   40   49   51   30   43   35   26   30   24   27   27   48 
##   66   67   68   69   70   71   72   73   74   75   76   77   78   79   80 
##   32   30   29   16   31   30   40   25   27   24   30   31   30   19   25 
##   81   82   83   84   85   86   87   88   89   90   91   92   93   94   95 
##   20   21   23   23   30   22   14   20   20   20   14   18   28   20   22 
##   96   97   98   99  100  101  102  103  104  105  106  107  108  109  110 
##   27   17   15   13   16   20   20   12   13   13   11   12   17   21   18 
##  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125 
##   12   15   19    8   23   18   17   10   15   19    9   10   14    5   18 
##  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140 
##   11    7    9   14    7   21    6   13   11   10   13   15   12    6    4 
##  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155 
##    8   12   13   15    4    5    7    8    9    9   11    8    7    6   12 
##  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170 
##    5    6    7   10   11    6    9   11   11    9   12    4    7    8   11 
##  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185 
##   16   12    7   10    6   10    6    4   11    9    7    4    6    7    9 
##  186  187  188  189  190  191  192  193  194  195  196  197  198  199  200 
##    4    5    5   11    4    5    3    4    5    4    5    5    7    4    5 
##  201  202  203  204  205  206  207  208  209  210  211  212  213  214  215 
##    2    7    7    4    6    6   10    5   11    3    6    6    2    3    5 
##  216  217  218  219  220  221  222  223  224  225  226  227  228  229  230 
##    4    5    7    5    4    3    4    7    4    4    3    9    5    5    8 
##  231  232  233  234  235  236  237  238  239  240  241  242  243  244  245 
##    3    5    3    2    5    5    6    3    3    5    5    4    3    7    4 
##  246  247  248  249  250  251  252  253  254  255  256  257  258  259  260 
##    2    3    3    2    2    4    3    1    3    2    2    5    2    3    2 
##  261  262  264  265  266  267  268  269  270  271  272  273  274  275  276 
##    4    3    5    1    2    6    2    6    6    4    3    2    3    5    5 
##  277  278  280  281  282  283  284  285  286  287  288  289  290  291  292 
##    1    1    6    4    7    6    1    4    3    6    4    2    6    5    1 
##  293  295  296  297  298  299  300  301  302  303  304  305  306  307  308 
##    2    3    3    1    2    5    2    1    1    2    5    3    3    2    1 
##  309  310  311  312  313  315  316  317  318  319  320  321  322  323  325 
##    1    2    2    5    4    2    2    3    2    2    1    1    3    1    2 
##  326  327  328  330  331  333  334  335  336  337  338  339  340  342  343 
##    1    1    1    1    2    1    1    1    1    2    2    5    4    4    2 
##  344  346  347  348  349  351  353  354  355  356  357  358  359  360  361 
##    2    3    3    2    1    1    3    1    2    4    2    1    2    1    1 
##  362  363  364  366  367  368  371  372  377  378  379  380  381  382  383 
##    3    1    2    1    1    2    3    1    1    4    1    3    2    1    2 
##  384  385  387  390  391  392  393  400  402  405  406  407  408  409  410 
##    1    2    2    2    5    1    6    1    1    2    3    1    2    1    1 
##  411  412  413  415  417  418  421  423  424  427  428  430  431  432  433 
##    2    1    2    1    1    2    1    1    1    1    2    3    1    1    1 
##  434  435  436  437  439  441  442  445  447  448  450  451  453  455  458 
##    1    1    5    3    2    1    1    2    3    2    1    4    2    3    1 
##  459  462  467  469  470  471  472  476  477  479  480  485  486  487  488 
##    3    1    2    1    1    2    1    1    3    1    1    1    3    2    1 
##  490  492  493  494  495  497  498  499  501  502  518  527  530  534  538 
##    1    1    1    2    1    1    2    1    2    1    1    2    3    2    2 
##  544  548  549  550  558  559  560  566  569  571  573  575  576  584  588 
##    1    1    1    1    2    2    1    1    1    1    1    1    1    1    1 
##  606  617  623  632  652  668  671  677  680  681  685  691  695  698  706 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    2    1 
##  709  715  718  740  753  767  823  862  899  979 1025 1026 
##    1    1    1    1    1    1    1    1    1    1    1    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   26.00   54.00   92.41  115.00 1026.00 
## 
## includes extended item information - examples:
##    labels
## 1 1000001
## 2 1000002
## 3 1000003
####The element length distribution 是被購買的次數, eg. 被買7次的product 有5種
## info: the mean purchased time is 92.41, however the median is 54, right skewed. 

itemFrequencyPlot(customer_product, topN=25)

?itemFrequencyPlot

Fit the model

The first value, lhs, corresponds to a grouping of items which the algorithm has pulled from the dataset.

The second value, rhs, corresponds to the value predicted by apriori to be purchased with items in the “lhs” category.

The third value, support is the number of transactions including that specific set of items divided by the total number of transactions. (As described earlier when we chose the parameters for Apriori.)

The fourth value, confidence is the % chance in which a rule will be upheld.

The fifth value, lift gives us the independance/dependence of a rule. It takes the confidence value and its relationship to the entire dataset into account.

The sixth and final value, count is the number of times a rule occured during the implementation of Apriori on our data.

rules <- apriori(data= customer_product, parameter= list(support=0.008, confidence= 0.8, maxtime= 0))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       0   0.008      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 47 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10539 item(s), 5892 transaction(s)] done [0.17s].
## sorting and recoding items ... [2099 item(s)] done [0.01s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [16.76s].
## writing ... [7 rule(s)] done [0.39s].
## creating S4 object  ... done [0.35s].
rules2 <- apriori(data= customer_product, parameter= list(support=0.008, confidence= 0.75, maxtime= 0))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.75    0.1    1 none FALSE            TRUE       0   0.008      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 47 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[10539 item(s), 5892 transaction(s)] done [0.15s].
## sorting and recoding items ... [2099 item(s)] done [0.01s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [15.66s].
## writing ... [171 rule(s)] done [0.40s].
## creating S4 object  ... done [0.34s].
#### maxtime = 0 will allow our algorithim to run until completion with no time limit

##support = 0.008, confidence= 0.8
inspect(sort(rules, by='lift'))
##     lhs            rhs             support confidence     lift count
## [1] {P00032042,                                                     
##      P00057642,                                                     
##      P00102642,                                                     
##      P00145042} => {P00270942} 0.008655804  0.8793103 4.540663    51
## [2] {P00025442,                                                     
##      P00031042,                                                     
##      P00034742,                                                     
##      P00255842} => {P00145042} 0.008486083  0.8064516 3.433246    50
## [3] {P00003242,                                                     
##      P00130742,                                                     
##      P00237542} => {P00145042} 0.008316361  0.8032787 3.419738    49
## [4] {P00006942,                                                     
##      P00251242,                                                     
##      P00277642} => {P00145042} 0.009674134  0.8028169 3.417773    57
## [5] {P00034042,                                                     
##      P00112442,                                                     
##      P00112542} => {P00110742} 0.008146640  0.8135593 3.012880    48
## [6] {P00127642,                                                     
##      P00165442,                                                     
##      P00277442} => {P00110742} 0.008316361  0.8032787 2.974807    49
## [7] {P00051442,                                                     
##      P00112142,                                                     
##      P00112542,                                                     
##      P00270942} => {P00110742} 0.008146640  0.8000000 2.962665    48
plot(rules, method='graph')

##7 rules

##support = 0.008, confidence= 0.75
inspect(sort(rules2, by='lift'))
##       lhs            rhs             support confidence     lift count
## [1]   {P00221142,                                                     
##        P00249642} => {P00103042} 0.008146640  0.7619048 8.030667    48
## [2]   {P00002142,                                                     
##        P00103042,                                                     
##        P00147942} => {P00221442} 0.008146640  0.7500000 6.045144    48
## [3]   {P00032042,                                                     
##        P00057642,                                                     
##        P00102642,                                                     
##        P00145042} => {P00270942} 0.008655804  0.8793103 4.540663    51
## [4]   {P00062842,                                                     
##        P00127242,                                                     
##        P00243942} => {P00044442} 0.008486083  0.7575758 4.061544    50
## [5]   {P00030842,                                                     
##        P00057942,                                                     
##        P00355142} => {P00114942} 0.008486083  0.7936508 4.024260    50
## [6]   {P00030842,                                                     
##        P00147742,                                                     
##        P00303342} => {P00044442} 0.008146640  0.7500000 4.020928    48
## [7]   {P00030842,                                                     
##        P00147742,                                                     
##        P00270942} => {P00044442} 0.009674134  0.7500000 4.020928    57
## [8]   {P00002142,                                                     
##        P00030842,                                                     
##        P0097242}  => {P00114942} 0.008316361  0.7903226 4.007384    49
## [9]   {P00028542,                                                     
##        P00052842,                                                     
##        P00329542} => {P00114942} 0.008146640  0.7868852 3.989955    48
## [10]  {P00105142,                                                     
##        P00127842,                                                     
##        P00173042} => {P00112542} 0.008825526  0.7536232 3.989531    52
## [11]  {P00003442,                                                     
##        P00057542,                                                     
##        P00277642} => {P00000142} 0.008825526  0.7647059 3.987298    52
## [12]  {P00002142,                                                     
##        P00030842,                                                     
##        P00113142} => {P00114942} 0.008655804  0.7846154 3.978446    51
## [13]  {P00057642,                                                     
##        P00151742,                                                     
##        P00199442} => {P00270942} 0.008995248  0.7681159 3.966467    53
## [14]  {P00057642,                                                     
##        P00102642,                                                     
##        P00127742,                                                     
##        P00145042} => {P00270942} 0.008146640  0.7619048 3.934393    48
## [15]  {P00032042,                                                     
##        P00057642,                                                     
##        P00127642,                                                     
##        P00145042} => {P00270942} 0.008146640  0.7619048 3.934393    48
## [16]  {P00052842,                                                     
##        P00334242,                                                     
##        P00346242} => {P00114942} 0.009334691  0.7746479 3.927905    55
## [17]  {P00052842,                                                     
##        P00122442,                                                     
##        P00151742} => {P00114942} 0.008146640  0.7741935 3.925601    48
## [18]  {P00052842,                                                     
##        P00122442,                                                     
##        P00127842} => {P00114942} 0.008655804  0.7727273 3.918166    51
## [19]  {P00040742,                                                     
##        P00221442} => {P00270942} 0.008486083  0.7575758 3.912039    50
## [20]  {P00057642,                                                     
##        P00102642,                                                     
##        P00127642,                                                     
##        P00145042} => {P00270942} 0.008995248  0.7571429 3.909803    53
## [21]  {P00143642,                                                     
##        P00145042,                                                     
##        P00303342} => {P00270942} 0.008316361  0.7538462 3.892780    49
## [22]  {P00085942,                                                     
##        P00103042,                                                     
##        P00255842} => {P00270942} 0.008316361  0.7538462 3.892780    49
## [23]  {P00046742,                                                     
##        P00105142,                                                     
##        P00140742,                                                     
##        P00184942} => {P00270942} 0.008316361  0.7538462 3.892780    49
## [24]  {P00001042,                                                     
##        P00142142,                                                     
##        P00243942} => {P00114942} 0.008316361  0.7656250 3.882154    49
## [25]  {P00030842,                                                     
##        P00113142,                                                     
##        P00127642} => {P00114942} 0.008316361  0.7656250 3.882154    49
## [26]  {P00151742,                                                     
##        P00243942,                                                     
##        P00295942} => {P00270942} 0.008146640  0.7500000 3.872918    48
## [27]  {P00127342,                                                     
##        P00326742} => {P00114942} 0.009843856  0.7631579 3.869644    58
## [28]  {P00028542,                                                     
##        P00244142} => {P00114942} 0.008146640  0.7619048 3.863290    48
## [29]  {P00057942,                                                     
##        P00147942,                                                     
##        P00221542} => {P00034742} 0.008146640  0.7741935 3.839687    48
## [30]  {P00052842,                                                     
##        P00120042,                                                     
##        P00122442} => {P00114942} 0.008316361  0.7538462 3.822428    49
## [31]  {P00052842,                                                     
##        P00100442,                                                     
##        P00127842} => {P00114942} 0.008316361  0.7538462 3.822428    49
## [32]  {P00034742,                                                     
##        P00073642,                                                     
##        P00145042} => {P00031042} 0.008995248  0.7571429 3.819423    53
## [33]  {P00105142,                                                     
##        P00169742,                                                     
##        P00329542} => {P00114942} 0.008146640  0.7500000 3.802926    48
## [34]  {P00059442,                                                     
##        P00120042,                                                     
##        P00122442} => {P00114942} 0.008146640  0.7500000 3.802926    48
## [35]  {P00052842,                                                     
##        P00073842,                                                     
##        P00321742} => {P00114942} 0.008146640  0.7500000 3.802926    48
## [36]  {P00112442,                                                     
##        P00220342,                                                     
##        P00251242} => {P00028842} 0.008146640  0.7500000 3.770478    48
## [37]  {P00127642,                                                     
##        P00153842,                                                     
##        P0097242}  => {P00117442} 0.008486083  0.7812500 3.751528    50
## [38]  {P00101942,                                                     
##        P00147942,                                                     
##        P00258742} => {P00117442} 0.008146640  0.7619048 3.658633    48
## [39]  {P00025442,                                                     
##        P00031042,                                                     
##        P00034742,                                                     
##        P00255842} => {P00145042} 0.008486083  0.8064516 3.433246    50
## [40]  {P00003242,                                                     
##        P00130742,                                                     
##        P00237542} => {P00145042} 0.008316361  0.8032787 3.419738    49
## [41]  {P00006942,                                                     
##        P00251242,                                                     
##        P00277642} => {P00145042} 0.009674134  0.8028169 3.417773    57
## [42]  {P00003942,                                                     
##        P00057742,                                                     
##        P00221442} => {P00145042} 0.008655804  0.7968750 3.392477    51
## [43]  {P00006942,                                                     
##        P00046742,                                                     
##        P00277642} => {P00145042} 0.009164969  0.7941176 3.380738    54
## [44]  {P00148642,                                                     
##        P00226342,                                                     
##        P00270942} => {P00145042} 0.008486083  0.7936508 3.378750    50
## [45]  {P00006942,                                                     
##        P00120042,                                                     
##        P00277642} => {P00110942} 0.008146640  0.7619048 3.360137    48
## [46]  {P00003242,                                                     
##        P00111142,                                                     
##        P00127842} => {P00145042} 0.011201629  0.7857143 3.344963    66
## [47]  {P00116742,                                                     
##        P00248142,                                                     
##        P00271142} => {P00117942} 0.008655804  0.7611940 3.332062    51
## [48]  {P00226342,                                                     
##        P00244042,                                                     
##        P00251242} => {P00145042} 0.008486083  0.7812500 3.325957    50
## [49]  {P00070342,                                                     
##        P00120042,                                                     
##        P00243942} => {P00110942} 0.008825526  0.7536232 3.323614    52
## [50]  {P00110942,                                                     
##        P00154042,                                                     
##        P00191442} => {P00145042} 0.008316361  0.7777778 3.311175    49
## [51]  {P00046742,                                                     
##        P00128242,                                                     
##        P0097242}  => {P00110942} 0.008655804  0.7500000 3.307635    51
## [52]  {P00020342,                                                     
##        P00182242,                                                     
##        P00270942} => {P00145042} 0.008825526  0.7761194 3.304115    52
## [53]  {P00110942,                                                     
##        P00140742,                                                     
##        P00145742} => {P00145042} 0.008146640  0.7741935 3.295916    48
## [54]  {P00221142,                                                     
##        P00249742} => {P00145042} 0.008486083  0.7692308 3.274789    50
## [55]  {P00003242,                                                     
##        P00127842,                                                     
##        P00193542} => {P00145042} 0.008486083  0.7692308 3.274789    50
## [56]  {P00100442,                                                     
##        P00111142,                                                     
##        P00147942} => {P00057642} 0.009164969  0.7941176 3.271987    54
## [57]  {P00003242,                                                     
##        P00145442,                                                     
##        P00221442} => {P00145042} 0.009504413  0.7671233 3.265817    56
## [58]  {P00125942,                                                     
##        P00145442,                                                     
##        P00221442} => {P00145042} 0.008316361  0.7656250 3.259438    49
## [59]  {P00116842,                                                     
##        P00147942,                                                     
##        P00151742} => {P00058042} 0.009164969  0.7714286 3.255915    54
## [60]  {P00057742,                                                     
##        P00111942,                                                     
##        P00270942} => {P00145042} 0.008825526  0.7647059 3.255525    52
## [61]  {P00151742,                                                     
##        P00222942} => {P00145042} 0.008146640  0.7619048 3.243600    48
## [62]  {P00006942,                                                     
##        P00221442,                                                     
##        P00277642} => {P00145042} 0.008146640  0.7619048 3.243600    48
## [63]  {P00057742,                                                     
##        P00142142,                                                     
##        P00199442} => {P00145042} 0.008146640  0.7619048 3.243600    48
## [64]  {P00070042,                                                     
##        P00117942,                                                     
##        P00277642} => {P00145042} 0.011371351  0.7613636 3.241297    67
## [65]  {P00110942,                                                     
##        P00127942,                                                     
##        P00289242} => {P00145042} 0.008655804  0.7611940 3.240575    51
## [66]  {P00070042,                                                     
##        P00110842,                                                     
##        P00151742} => {P00145042} 0.008655804  0.7611940 3.240575    51
## [67]  {P00062842,                                                     
##        P00110742,                                                     
##        P00110942,                                                     
##        P00221442} => {P00046742} 0.008316361  0.7777778 3.234063    49
## [68]  {P00003942,                                                     
##        P00035842,                                                     
##        P00251242} => {P00145042} 0.008995248  0.7571429 3.223328    53
## [69]  {P00042142,                                                     
##        P00057642,                                                     
##        P00127942} => {P00046742} 0.009334691  0.7746479 3.221048    55
## [70]  {P00031042,                                                     
##        P00057642,                                                     
##        P00370242} => {P00058042} 0.008146640  0.7619048 3.215718    48
## [71]  {P00115142,                                                     
##        P00147942,                                                     
##        P00151742} => {P00058042} 0.008655804  0.7611940 3.212719    51
## [72]  {P00221142,                                                     
##        P00226342} => {P00145042} 0.008316361  0.7538462 3.209293    49
## [73]  {P00070042,                                                     
##        P00117942,                                                     
##        P00161442} => {P00145042} 0.008316361  0.7538462 3.209293    49
## [74]  {P00037142,                                                     
##        P00154042,                                                     
##        P00221442} => {P00145042} 0.008316361  0.7538462 3.209293    49
## [75]  {P00003242,                                                     
##        P00103042,                                                     
##        P00251242} => {P00145042} 0.008825526  0.7536232 3.208344    52
## [76]  {P00183342,                                                     
##        P00226342,                                                     
##        P00251242} => {P00145042} 0.009334691  0.7534247 3.207499    55
## [77]  {P00057742,                                                     
##        P00102642,                                                     
##        P00103042} => {P00145042} 0.009334691  0.7534247 3.207499    55
## [78]  {P00128942,                                                     
##        P00144642,                                                     
##        P00329542} => {P00057642} 0.010013578  0.7763158 3.198638    59
## [79]  {P00244042,                                                     
##        P00251842} => {P00046742} 0.008486083  0.7692308 3.198523    50
## [80]  {P00208342,                                                     
##        P00267542} => {P00145042} 0.008655804  0.7500000 3.192919    51
## [81]  {P00020342,                                                     
##        P00120042,                                                     
##        P00270942} => {P00145042} 0.008146640  0.7500000 3.192919    48
## [82]  {P00037142,                                                     
##        P00201342,                                                     
##        P00270942} => {P00145042} 0.008146640  0.7500000 3.192919    48
## [83]  {P00006942,                                                     
##        P00070042,                                                     
##        P00265242} => {P00145042} 0.008146640  0.7500000 3.192919    48
## [84]  {P00037142,                                                     
##        P00110942,                                                     
##        P00127842} => {P00145042} 0.009674134  0.7500000 3.192919    57
## [85]  {P00110942,                                                     
##        P00127942,                                                     
##        P00323942} => {P00145042} 0.008146640  0.7500000 3.192919    48
## [86]  {P00003942,                                                     
##        P00035842,                                                     
##        P00210042} => {P00145042} 0.008146640  0.7500000 3.192919    48
## [87]  {P00003242,                                                     
##        P00117442,                                                     
##        P00142142} => {P00145042} 0.008655804  0.7500000 3.192919    51
## [88]  {P00057742,                                                     
##        P00100842,                                                     
##        P00242742} => {P00145042} 0.008146640  0.7500000 3.192919    48
## [89]  {P00057642,                                                     
##        P00117442,                                                     
##        P00221442,                                                     
##        P00270942} => {P00145042} 0.008146640  0.7500000 3.192919    48
## [90]  {P00110742,                                                     
##        P00182742,                                                     
##        P00183342} => {P00046742} 0.008825526  0.7647059 3.179709    52
## [91]  {P00102642,                                                     
##        P00111142,                                                     
##        P00270942,                                                     
##        P00329542} => {P00057642} 0.008486083  0.7692308 3.169446    50
## [92]  {P00112542,                                                     
##        P00130742,                                                     
##        P00255842} => {P00058042} 0.008655804  0.7500000 3.165473    51
## [93]  {P00006942,                                                     
##        P00062842,                                                     
##        P00242742} => {P00046742} 0.008655804  0.7611940 3.165106    51
## [94]  {P00085942,                                                     
##        P00183342,                                                     
##        P0097242}  => {P00057642} 0.008316361  0.7656250 3.154589    49
## [95]  {P00058042,                                                     
##        P00102642,                                                     
##        P00127742} => {P00057642} 0.009843856  0.7631579 3.144424    58
## [96]  {P00110942,                                                     
##        P00130642,                                                     
##        P00329542} => {P00057642} 0.008655804  0.7611940 3.136332    51
## [97]  {P00100442,                                                     
##        P00102642,                                                     
##        P00145042,                                                     
##        P00270942} => {P00057642} 0.008655804  0.7611940 3.136332    51
## [98]  {P00169742,                                                     
##        P00187442,                                                     
##        P00242742} => {P00046742} 0.008316361  0.7538462 3.134553    49
## [99]  {P00034042,                                                     
##        P00052842,                                                     
##        P00070042} => {P00046742} 0.008825526  0.7536232 3.133626    52
## [100] {P00110742,                                                     
##        P00182342,                                                     
##        P00241642} => {P00057642} 0.008486083  0.7575758 3.121424    50
## [101] {P00169742,                                                     
##        P00183242,                                                     
##        P00184942} => {P00046742} 0.008146640  0.7500000 3.118560    48
## [102] {P00062842,                                                     
##        P00110742,                                                     
##        P00110942,                                                     
##        P00242742} => {P00046742} 0.009164969  0.7500000 3.118560    54
## [103] {P00003242,                                                     
##        P00028842,                                                     
##        P00237542,                                                     
##        P00270942} => {P00057642} 0.008655804  0.7500000 3.090210    51
## [104] {P00034742,                                                     
##        P00110942,                                                     
##        P00145042,                                                     
##        P00270942} => {P00057642} 0.008146640  0.7500000 3.090210    48
## [105] {P00034042,                                                     
##        P00112442,                                                     
##        P00112542} => {P00110742} 0.008146640  0.8135593 3.012880    48
## [106] {P00127642,                                                     
##        P00165442,                                                     
##        P00277442} => {P00110742} 0.008316361  0.8032787 2.974807    49
## [107] {P00051442,                                                     
##        P00112142,                                                     
##        P00112542,                                                     
##        P00270942} => {P00110742} 0.008146640  0.8000000 2.962665    48
## [108] {P00051442,                                                     
##        P00110742,                                                     
##        P00117942,                                                     
##        P00270942} => {P00112142} 0.008486083  0.7692308 2.944969    50
## [109] {P00128042,                                                     
##        P00313542} => {P00112142} 0.008316361  0.7656250 2.931165    49
## [110] {P00046742,                                                     
##        P00112542,                                                     
##        P00325742} => {P00025442} 0.008655804  0.7846154 2.914851    51
## [111] {P00000142,                                                     
##        P00070042,                                                     
##        P00183242} => {P00112142} 0.008655804  0.7611940 2.914201    51
## [112] {P00070042,                                                     
##        P00121142,                                                     
##        P00129542} => {P00110742} 0.008146640  0.7868852 2.914097    48
## [113] {P00025442,                                                     
##        P00057642,                                                     
##        P00110842,                                                     
##        P00140742} => {P00112142} 0.008486083  0.7575758 2.900349    50
## [114] {P00051442,                                                     
##        P00192542,                                                     
##        P00270942} => {P00112142} 0.008995248  0.7571429 2.898691    53
## [115] {P00025442,                                                     
##        P00190142,                                                     
##        P00221442} => {P00112142} 0.008995248  0.7571429 2.898691    53
## [116] {P00034042,                                                     
##        P00244142} => {P00110742} 0.008316361  0.7777778 2.880369    49
## [117] {P00070042,                                                     
##        P00113342,                                                     
##        P00144642} => {P00110742} 0.008316361  0.7777778 2.880369    49
## [118] {P00046742,                                                     
##        P00113142,                                                     
##        P00325742} => {P00025442} 0.008146640  0.7741935 2.876134    48
## [119] {P00051442,                                                     
##        P00110742,                                                     
##        P00112542,                                                     
##        P00270942} => {P00112142} 0.008146640  0.7500000 2.871345    48
## [120] {P00034042,                                                     
##        P00111742,                                                     
##        P0097242}  => {P00110742} 0.008146640  0.7741935 2.867095    48
## [121] {P00000142,                                                     
##        P00034042,                                                     
##        P00277442} => {P00110742} 0.008146640  0.7741935 2.867095    48
## [122] {P00034042,                                                     
##        P00118442,                                                     
##        P0097242}  => {P00110742} 0.008655804  0.7727273 2.861665    51
## [123] {P00052842,                                                     
##        P00112142,                                                     
##        P00173842} => {P00110742} 0.009674134  0.7702703 2.852566    57
## [124] {P00182742,                                                     
##        P00278242} => {P00110742} 0.008486083  0.7692308 2.848716    50
## [125] {P00025442,                                                     
##        P00034042,                                                     
##        P00111742} => {P00110742} 0.008486083  0.7692308 2.848716    50
## [126] {P00021742,                                                     
##        P00112142,                                                     
##        P00147942} => {P00110742} 0.009504413  0.7671233 2.840912    56
## [127] {P00057642,                                                     
##        P00105142,                                                     
##        P00127342} => {P00025442} 0.011541073  0.7640449 2.838432    68
## [128] {P00000142,                                                     
##        P00000642,                                                     
##        P00112542} => {P00110742} 0.008316361  0.7656250 2.835363    49
## [129] {P00004742,                                                     
##        P00034042,                                                     
##        P0097242}  => {P00110742} 0.008825526  0.7647059 2.831959    52
## [130] {P00046742,                                                     
##        P00193542,                                                     
##        P00325742} => {P00025442} 0.008146640  0.7619048 2.830481    48
## [131] {P00100442,                                                     
##        P00110942,                                                     
##        P00326742} => {P00025442} 0.008146640  0.7619048 2.830481    48
## [132] {P00110942,                                                     
##        P00199442,                                                     
##        P00326742} => {P00025442} 0.008146640  0.7619048 2.830481    48
## [133] {P00057942,                                                     
##        P00073842,                                                     
##        P00318742} => {P00110742} 0.009334691  0.7638889 2.828934    55
## [134] {P00025442,                                                     
##        P00102642,                                                     
##        P00105142,                                                     
##        P00112542} => {P00110742} 0.009334691  0.7638889 2.828934    55
## [135] {P00021742,                                                     
##        P00057942,                                                     
##        P00112142} => {P00110742} 0.009843856  0.7631579 2.826226    58
## [136] {P00110942,                                                     
##        P00184242,                                                     
##        P00323942} => {P00110742} 0.008146640  0.7619048 2.821586    48
## [137] {P00028842,                                                     
##        P00100442,                                                     
##        P00316642} => {P00110742} 0.008146640  0.7619048 2.821586    48
## [138] {P00046742,                                                     
##        P00112542,                                                     
##        P00159442} => {P00110742} 0.008146640  0.7619048 2.821586    48
## [139] {P00034042,                                                     
##        P00117242} => {P00110742} 0.008655804  0.7611940 2.818954    51
## [140] {P00034042,                                                     
##        P00111142,                                                     
##        P00128942} => {P00110742} 0.008655804  0.7611940 2.818954    51
## [141] {P00057942,                                                     
##        P00070042,                                                     
##        P00070342} => {P00110742} 0.009674134  0.7600000 2.814532    57
## [142] {P00000142,                                                     
##        P00112542,                                                     
##        P00216142} => {P00110742} 0.008486083  0.7575758 2.805554    50
## [143] {P00034042,                                                     
##        P00057942,                                                     
##        P00199442} => {P00110742} 0.008486083  0.7575758 2.805554    50
## [144] {P00003242,                                                     
##        P00034042,                                                     
##        P00057942} => {P00110742} 0.008486083  0.7575758 2.805554    50
## [145] {P00057942,                                                     
##        P00112542,                                                     
##        P00274242} => {P00110742} 0.008316361  0.7538462 2.791742    49
## [146] {P00053842,                                                     
##        P00057942,                                                     
##        P00277642} => {P00110742} 0.008316361  0.7538462 2.791742    49
## [147] {P00031042,                                                     
##        P00105142,                                                     
##        P00129542} => {P00110742} 0.008316361  0.7538462 2.791742    49
## [148] {P00025442,                                                     
##        P00059442,                                                     
##        P00111742,                                                     
##        P00114942} => {P00110742} 0.008316361  0.7538462 2.791742    49
## [149] {P00025442,                                                     
##        P00121642,                                                     
##        P00161442} => {P00110742} 0.008825526  0.7536232 2.790916    52
## [150] {P00070342,                                                     
##        P00144642,                                                     
##        P00182242} => {P00110742} 0.008825526  0.7536232 2.790916    52
## [151] {P00111142,                                                     
##        P00154042,                                                     
##        P00275842} => {P00110742} 0.009334691  0.7534247 2.790181    55
## [152] {P00003242,                                                     
##        P00025442,                                                     
##        P00034042} => {P00110742} 0.009334691  0.7534247 2.790181    55
## [153] {P00057942,                                                     
##        P00105142,                                                     
##        P00182242} => {P00110742} 0.010862186  0.7529412 2.788391    64
## [154] {P00051442,                                                     
##        P00059442,                                                     
##        P00110742,                                                     
##        P00111742} => {P00025442} 0.008146640  0.7500000 2.786255    48
## [155] {P00274142,                                                     
##        P00316642} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [156] {P00046742,                                                     
##        P00057642,                                                     
##        P00222942} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [157] {P00046742,                                                     
##        P00145742,                                                     
##        P0097242}  => {P00110742} 0.008146640  0.7500000 2.777498    48
## [158] {P00010742,                                                     
##        P00053842,                                                     
##        P00057942} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [159] {P00034042,                                                     
##        P00105142,                                                     
##        P00121642} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [160] {P00057942,                                                     
##        P00070342,                                                     
##        P00173842} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [161] {P00057942,                                                     
##        P00101942,                                                     
##        P00112542} => {P00110742} 0.009164969  0.7500000 2.777498    54
## [162] {P00070042,                                                     
##        P00112542,                                                     
##        P00142142} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [163] {P00057642,                                                     
##        P00057942,                                                     
##        P00184942,                                                     
##        P00270942} => {P00110742} 0.008655804  0.7500000 2.777498    51
## [164] {P00046742,                                                     
##        P00073842,                                                     
##        P00110942,                                                     
##        P00112542} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [165] {P00051442,                                                     
##        P00111142,                                                     
##        P00112142,                                                     
##        P00270942} => {P00110742} 0.008146640  0.7500000 2.777498    48
## [166] {P00028842,                                                     
##        P00212942,                                                     
##        P00294542} => {P00265242} 0.008146640  0.7741935 2.455085    48
## [167] {P00046742,                                                     
##        P00145442,                                                     
##        P00294542} => {P00265242} 0.008146640  0.7619048 2.416116    48
## [168] {P00102342,                                                     
##        P00248742} => {P00265242} 0.009164969  0.7605634 2.411862    54
## [169] {P00057642,                                                     
##        P00213242,                                                     
##        P00278642} => {P00265242} 0.009164969  0.7605634 2.411862    54
## [170] {P00028842,                                                     
##        P00031042,                                                     
##        P00234842} => {P00265242} 0.008486083  0.7575758 2.402388    50
## [171] {P00003942,                                                     
##        P00010742,                                                     
##        P00182742} => {P00265242} 0.008146640  0.7500000 2.378364    48
plot(rules2, method='graph', max=25)

plot(rules2, method='grouped', max=25)

##171 rules

##interpretation: customer who bought P00221142,P00249642 bought P00221142,P00249642 76.19% of the time, given a support of 0.008.
#The size of the bubbles represents the support value of the rule and the fill/color represents the lift.